package com.mpsc.spark.demo.util;

import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import scala.Tuple2;

import java.util.Arrays;
import java.util.List;

public class SparkUtil {
    public void testSparkText() {
        String file = "D:\\TEMP\\word.txt";
        JavaRDD<String> fileRDD = javaSparkContext.textFile(file);

        JavaRDD<String> wordsRDD = fileRDD.flatMap(line -> Arrays.asList(line.split(" ")).iterator());
        JavaPairRDD<String, Integer> wordAndOneRDD = wordsRDD.mapToPair(word -> new Tuple2<>(word, 1));
        JavaPairRDD<String, Integer> wordAndCountRDD = wordAndOneRDD.reduceByKey((a, b) -> a + b);

        //输出结果
        List<Tuple2<String, Integer>> result = wordAndCountRDD.collect();
        result.forEach(System.out::println);
    }
}
